# Script to produce Table 1, Figure 1
# Gould et al. 2023 EHP

# 0. Libraries -------

library(tidyverse)
library(haven)
library(cowplot)
library(arsenal)
library(sf)

# Specify how we want summary tables to look (from arsenal package)
mycontrols  <- tableby.control(test=TRUE, total=FALSE,
                               numeric.stats=c("meansd", "medianq1q3"),
                               cat.stats=c("countpct"),
                               stats.labels=list(countpct='N (%)', meansd='Mean (SD)', medianq1q3="Median (IQR)", range = "Range"),
                               digits = 2L,
                               digits.pct = 0L,
                               digits.p = 3L,
                               pfootnote = T)

# 1. Data ---------

# Canton ID matching and population
canton_Ns <- read_csv("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Manuscript/Submission/LGH/Data/canton_id_universal.csv") %>%
  dplyr::select(-`...1`)


# Shapefiles
ecu_shp1 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm1_inec_20190724.shp")
ecu_shp2 <- sf::st_read("~/Downloads/ecu_adm_inec_20190724_shp/ecu_admbnda_adm2_inec_20190724.shp")
ecu_shp2_df_bind <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/May2020/OtherCovariates/period-avgs/ecu_shp2_df_bind.rds")
cf_province <- read_rds("~/BurkeLab Dropbox/Carlos Gould/Ecuador National Health and LPG Analysis/Code/DataProducts/CF_Predictions/province_cf_period_avg.rds")

# Main data ------
full_df <- read_rds("~/Downloads/gould_ehp_ecuador_cf_df.rds")

# Table 1 --------

# Full time period -----
table_full <- tableby(~ ALRI5_5_plus + # period average cantonal under-5 LRI mortality count
                        ALRI5_5_plus_100kpop + # period average cantonal under-5 LRI mortality rate per 100k under-5 pop
                        cf + # % clean fuel use
                        under5population + # under 5 pop
                        percent_rural_corrected_ratio + # % rural
                        electricidad_no + # % not electrified
                        material_techo_nicer_all + # % better roofing
                        material_pared_hormigon_bloque_ladrillo + # % better wall materials
                        material_piso_nicer + # % better flooring
                        materials_index + # household building materials index
                        obtiene_agua_redpublica_tuberia_adentro_use + # % gets water through tubing indoors from public lines
                        elimina_servidas_red_pozo_letrina + # % better solid waste management
                        servicio_ducha_excluso + # % has own shower
                        elimina_basura_service + # % trash picked up via service
                        hygiene_index + # household hygiene and sanitation index
                        Literate_women + # % women literate
                        InSchool_girls + # % girls under 18 y in shool
                        Idioma_indigena_self + # % speak indigenous language in household or self
                        BCG_use + # % BCG vaccination under-5
                        DPT3_use + # % DPT-3 vaccination under-5
                        SAR_use + # % sarampion vaccination under-5
                        Polio_use + # % polio vaccination under-5
                        pcv3_use + # % pneumoccocal conjugate vaccine 3 doses under-5
                        vaccine_index + # vaccine index
                        mage_mean + # maternal age mean (age at delivery)
                        ANC_use + # % received any antenatal care
                        ANC_visits_median_use, # if received antenatal care, median visits
                      data = full_df,
                      control = mycontrols)


summary(table_full)

# This code can be used to save as a word document
# write2word(summary(table_full), "~/Downloads/table_full.doc")

# Period -------

table_full_period <- tableby(period ~ 
                               ALRI5_5_plus + # period average cantonal under-5 LRI mortality count
                               ALRI5_5_plus_100kpop + # period average cantonal under-5 LRI mortality rate per 100k under-5 pop
                               cf + # % clean fuel use
                               under5population + # under 5 pop
                               percent_rural_corrected_ratio + # % rural
                               electricidad_no + # % not electrified
                               material_techo_nicer_all + # % better roofing
                               material_pared_hormigon_bloque_ladrillo + # % better wall materials
                               material_piso_nicer + # % better flooring
                               materials_index + # household building materials index
                               obtiene_agua_redpublica_tuberia_adentro_use + # % gets water through tubing indoors from public lines
                               elimina_servidas_red_pozo_letrina + # % better solid waste management
                               servicio_ducha_excluso + # % has own shower
                               elimina_basura_service + # % trash picked up via service
                               hygiene_index + # household hygiene and sanitation index
                               Literate_women + # % women literate
                               InSchool_girls + # % girls under 18 y in shool
                               Idioma_indigena_self + # % speak indigenous language in household or self
                               BCG_use + # % BCG vaccination under-5
                               DPT3_use + # % DPT-3 vaccination under-5
                               SAR_use + # % sarampion vaccination under-5
                               Polio_use + # % polio vaccination under-5
                               pcv3_use + # % pneumoccocal conjugate vaccine 3 doses under-5
                               vaccine_index + # vaccine index
                               mage_mean + # maternal age mean (age at delivery)
                               ANC_use + # % received any antenatal care
                               ANC_visits_median_use, # if received antenatal care, median visits
                             data = full_df,
                             control = mycontrols)

summary(table_full)

# Figure 1: maps of %CF, under-5 LRI mortality rates, and under-5 population --------

# % CF --------

canton_cf_map_1990 <- 
  ggplot(data=ecu_shp2_df_bind) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_1990), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = .5) + 
  scale_fill_viridis_c(limits = c(0, 1)) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_1990_galapagos <- ggplot(data=ecu_shp2_df_bind %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_1990),colour ="white", size= .0005) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  # coord_fixed() + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_1990_full <- ggdraw() +
  draw_plot(canton_cf_map_1990, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_cf_map_1990_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_cf_map_2001 <- ggplot(data=ecu_shp2_df_bind) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2001), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(limits = c(0, 1)) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_2001_galapagos <- ggplot(data=ecu_shp2_df_bind %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2001), colour ="white", size= .0005) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  # coord_fixed() + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_2001_full <- ggdraw() +
  draw_plot(canton_cf_map_2001, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_cf_map_2001_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_cf_map_2010 <- ggplot(data=ecu_shp2_df_bind) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2010), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(limits = c(0, 1)) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_2010_galapagos <- ggplot(data=ecu_shp2_df_bind %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2010), colour ="white", size= .0005) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  # coord_fixed() + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_cf_map_2010_full <- ggdraw() +
  draw_plot(canton_cf_map_2010, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_cf_map_2010_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_cf_map_2015 <- ggplot(data=ecu_shp2_df_bind) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2015), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(limits = c(0, 1)) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_cf_map_2015_galapagos <- ggplot(data=ecu_shp2_df_bind %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=cf_2015), colour ="white", size= .0005) +
  scale_fill_viridis_c(limits = c(0, 1)) +
  # coord_fixed() + 
  labs(fill = "Primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_cf_map_2015_full <- ggdraw() +
  draw_plot(canton_cf_map_2015, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_cf_map_2015_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


cf_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind %>% 
           mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
           filter(Galapagos == "EC20")) + 
    ggmap::theme_nothing() +
    geom_sf(aes(fill=cf_2015), colour ="white", size= .0005) +
    scale_fill_viridis_c(limits = c(0, 1),
                         breaks=c(0, .25, .5, .75, 1),
                         labels=scales::percent_format()) +
    # coord_fixed() + 
    labs(fill = "Primary clean\nfuel use") +
    theme(legend.title = element_text(size=10),
          legend.text = element_text(size=8),
          # legend.position = c(0.83, 0.20),
          legend.position = "bottom",
          legend.key.height = unit(0.5, 'cm'),
          legend.key.width = unit(0.75, 'cm'),
          plot.title.position = "plot",
          plot.title = element_text(size=24))
)

cf_map_full <- plot_grid(
  
  canton_cf_map_1990_full,
  canton_cf_map_2001_full,
  canton_cf_map_2010_full,
  canton_cf_map_2015_full,
  cf_map_legend,
  nrow=1,
  rel_widths=c(1, 1, 1, 1, 0.6),
  labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019", ""),
  label_size = 12,
  # label_fontface = "bold",
  label_y =  0.95,
  label_x = c(0, 0, 0, -0.15, 0)
  
)


# under 5 LRI mortalities --------


ecu_shp2_df_bind1_lri <- 
  ecu_shp2_df_bind %>% 
  left_join(full_df %>% 
              dplyr::select(canton_id_universal, period, ALRI5_5_plus_100kpop) %>% 
              distinct() %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(ALRI5_5_plus_100kpop)), by ="ADM2_PCODE")

my_breaks_lri <- c(0, 5, 10, 25, 50, 100, 250)

canton_lri_map_1990 <- ggplot(data=ecu_shp2_df_bind1_lri) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`1990`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "under-5 LRI mortality rate") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_lri_map_1990_galapagos <- 
  ggplot(data=ecu_shp2_df_bind1_lri %>% 
           mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>%
           filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`1990`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_lri_map_1990_full <- ggdraw() +
  draw_plot(canton_lri_map_1990, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_lri_map_1990_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)

canton_lri_map_2001 <- ggplot(data=ecu_shp2_df_bind1_lri) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2001`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_lri_map_2001_galapagos <- ggplot(data=ecu_shp2_df_bind1_lri %>% 
                                          mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                          filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2001`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  # coord_fixed() + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_lri_map_2001_full <- ggdraw() +
  draw_plot(canton_lri_map_2001, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_lri_map_2001_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_lri_map_2010 <- ggplot(data=ecu_shp2_df_bind1_lri) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2010`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_lri_map_2010_galapagos <- ggplot(data=ecu_shp2_df_bind1_lri %>% 
                                          mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                          filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2010`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  # coord_fixed() + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_lri_map_2010_full <- ggdraw() +
  draw_plot(canton_lri_map_2010, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_lri_map_2010_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_lri_map_2015 <- ggplot(data=ecu_shp2_df_bind1_lri) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_lri_map_2015_galapagos <- ggplot(data=ecu_shp2_df_bind1_lri %>% 
                                          mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                          filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  # coord_fixed() + 
  labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_lri_map_2015_full <- ggdraw() +
  draw_plot(canton_lri_map_2015, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_lri_map_2015_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


lri_map_legend0 <- 
  ggplot(data=ecu_shp2_df_bind1_lri) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks_lri, labels = my_breaks_lri, 
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(3, 250),
                       oob=scales::squish) + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "under-5 LRI\nmortality rate per\n100k under-5\npopulation") +
  theme(legend.title = element_text(size=8),
        legend.text = element_text(size=8),
        # legend.position = c(0.83, 0.20),
        legend.position = "bottom",
        legend.key.height = unit(0.5, 'cm'),
        legend.key.width = unit(0.70, 'cm'),
        plot.title.position = "plot",
        plot.title = element_text(size=24))
lri_map_legend <- get_legend(
  lri_map_legend0
)

lri_map_full <- plot_grid(
  
  canton_lri_map_1990_full,
  canton_lri_map_2001_full,
  canton_lri_map_2010_full,
  canton_lri_map_2015_full,
  lri_map_legend,
  nrow=1,
  rel_widths=c(1, 1, 1, 1, 0.6),
  labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019", ""),
  label_size = 12,
  # label_fontface = "bold",
  label_y =  0.95,
  label_x = c(0, 0, 0, -0.15, 0)
  
)


# under 5 population --------


ecu_shp2_centroids <- 
  ecu_shp2_df_bind %>% 
  sf::st_centroid()

ecu_shp2_df_bind1 <- 
  ecu_shp2_df_bind %>% 
  left_join(full_df %>% 
              dplyr::select(canton_id_universal, period, under5population) %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              pivot_wider(names_from = period, values_from = c(under5population)), by ="ADM2_PCODE")

my_breaks <- c(1000, 10000, 50000, 250000)

canton_under5pop_map_1990 <- ggplot(data=ecu_shp2_df_bind1) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`1990`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  # scale_fill_viridis_c() +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "under 5 population") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_under5pop_map_1990_galapagos <- ggplot(data=ecu_shp2_df_bind1 %>% 
                                                mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                                filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`1990`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  coord_sf(clip="off") + 
  labs(fill = "under 5 population") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_under5pop_map_1990_full <- ggdraw() +
  draw_plot(canton_under5pop_map_1990, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_under5pop_map_1990_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)



canton_under5pop_map_2001 <- ggplot(data=ecu_shp2_df_bind1) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2001`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") + 
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_under5pop_map_2001_galapagos <- ggplot(data=ecu_shp2_df_bind1 %>% 
                                                mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                                filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2001`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") +
  coord_sf(clip="off") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_under5pop_map_2001_full <- ggdraw() +
  draw_plot(canton_under5pop_map_2001, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_under5pop_map_2001_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_under5pop_map_2010 <- ggplot(data=ecu_shp2_df_bind1) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2010`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  # labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_under5pop_map_2010_galapagos <- ggplot(data=ecu_shp2_df_bind1 %>% 
                                                mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                                filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2010`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") +
  # coord_fixed() + 
  # labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_under5pop_map_2010_full <- ggdraw() +
  draw_plot(canton_under5pop_map_2010, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_under5pop_map_2010_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


canton_under5pop_map_2015 <- 
  ggplot(data=ecu_shp2_df_bind1) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  # labs(fill = "% primary clean fuel use") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))

canton_under5pop_map_2015_galapagos <- ggplot(data=ecu_shp2_df_bind1 %>% 
                                                mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                                filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  coord_sf(clip="off") + 
  geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
  scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                       trans = scales::pseudo_log_trans(sigma = 0.5),
                       limits=c(1000, 250000),
                       oob=scales::squish) +
  labs(fill = "under 5 population") +
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_text(size=24))


canton_under5pop_map_2015_full <- ggdraw() +
  draw_plot(canton_under5pop_map_2015, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_under5pop_map_2015_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)


under5pop_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind1 %>% 
           mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
           filter(Galapagos == "EC20")) + 
    ggmap::theme_nothing() +
    coord_sf(clip="off") + 
    geom_sf(aes(fill=`2015-2019`), colour ="white", size= .0005) +
    scale_fill_viridis_c(breaks = my_breaks, labels = my_breaks,
                         trans = scales::pseudo_log_trans(sigma = 0.5),
                         limits=c(1000, 250000),
                         oob=scales::squish) +
    labs(fill = "under-5\npopulation  ") +
    theme(legend.title = element_text(size=9),
          legend.text = element_text(size=7),
          legend.position = "bottom",
          legend.key.height = unit(0.5, 'cm'),
          legend.key.width = unit(0.75, 'cm'))
)


# Combined figures ------

# Combine and label with periods
cf_map_full <- plot_grid(
  
  canton_cf_map_1990_full,
  canton_cf_map_2001_full,
  canton_cf_map_2010_full,
  canton_cf_map_2015_full,
  cf_map_legend,
  nrow=5,
  rel_heights=c(1, 1, 1, 1, 0.6),
  labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019", ""),
  label_size = 10,
  # label_fontface = "bold",
  label_y =  0.98,
  label_x = c(-0.08, -0.08, -0.08, -0.08, 0)
  
)

# Combine and label with periods
lri_map_full <- plot_grid(
  
  canton_lri_map_1990_full,
  canton_lri_map_2001_full,
  canton_lri_map_2010_full,
  canton_lri_map_2015_full,
  lri_map_legend,
  nrow=5,
  rel_heights=c(1, 1, 1, 1, 0.6),
  labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019", ""),
  label_size = 10,
  # label_fontface = "bold",
  label_y =  0.98,
  label_x = c(-0.08, -0.08, -0.08, -0.08, 0)
  
)


# Combine and label with periods
under5pop_map_full <- plot_grid(
  
  canton_under5pop_map_1990_full,
  canton_under5pop_map_2001_full,
  canton_under5pop_map_2010_full,
  canton_under5pop_map_2015_full,
  under5pop_map_legend,
  nrow=5,
  rel_heights=c(1, 1, 1, 1, 0.6),
  labels = c("1988-1992", "1999-2003", "2008-2012", "2015-2019", ""),
  label_size = 10,
  # label_fontface = "bold",
  label_y =  0.98,
  label_x = c(-0.08, -0.08, -0.08, -0.08, 0)
  
)


# Combine and label with titles
figure1_map_combined_full <- 
  plot_grid(
    NULL, NULL, NULL, 
    cf_map_full,
    lri_map_full,
    under5pop_map_full,
    nrow=2,
    rel_heights=c(0.025, 1),
    align="hv",
    labels=c("Clean fuel use", "under-5 LRI mortality", "under-5 population"),
    label_size=12,
    label_x=c(0, -0.1, -0.05)
  )


# Save
# cowplot::ggsave2("~/Desktop/Figure1.pdf",
#                  figure1_map_combined_full,
#                  height = 254,
#                  width = 190.5,
#                  units = "mm",
#                  dpi = 600)



# Main linear regressions ---
alri5_glm_1_mod <- fixest::feglm(ALRI5_5_plus ~
                                   offset(log(under5population)) + 
                                   cf10 + # % CF per 10 pp increase
                                   percent_rural_corrected_ratio + 
                                   electricidad_no + 
                                   materials_index + 
                                   elimina_servidas_red_pozo_letrina +
                                   Literate_women +
                                   InSchool_girls + 
                                   Idioma_indigena_self +
                                   pcv3_use + 
                                   vaccine_index + 
                                   ANC_use + 
                                   ANC_visits_median_use 
                                 |
                                   as.factor(canton_id_universal) +
                                   as.factor(period),
                                 data=full_df,
                                 family = quasipoisson())

tidy(alri5_glm_1_mod,conf.int = T)

# Breakpoint analysis ------

alri5_glm_1_mod_temp <- glm(ALRI5_5_plus ~
                           offset(log(under5population)) + 
                           cf10 + # % CF per 10 pp increase
                           percent_rural_corrected_ratio + 
                           electricidad_no + 
                           materials_index + 
                           elimina_servidas_red_pozo_letrina +
                           Literate_women +
                           InSchool_girls + 
                           Idioma_indigena_self +
                           pcv3_use + 
                           vaccine_index + 
                           ANC_use + 
                           ANC_visits_median_use +
                           as.factor(canton_id_universal) +
                           as.factor(period)
                         ,
                         data=full_df,
                         family = quasipoisson())


glm1_segment <- segmented::segmented(alri5_glm_1_mod_temp,
                                     seg.Z = ~ cf10,
                                     psi=c(5))
summary(glm1_segment)

# getting CIs
(6.135 + (1.96*0.454)) / 10
(6.135 - (1.96*0.454)) / 10

# Main GAM ------
alri5_gam_1_mod <- gam(ALRI5_5_plus ~
                         offset(log(under5population)) + 
                         s(cf,k=4,fx=T) + 
                         percent_rural_corrected_ratio + 
                         electricidad_no + 
                         materials_index + 
                         elimina_servidas_red_pozo_letrina +
                         Literate_women +
                         InSchool_girls + 
                         Idioma_indigena_self +
                         pcv3_use + 
                         vaccine_index + 
                         ANC_use + 
                         ANC_visits_median_use +
                         as.factor(canton_id_universal) +
                         as.factor(period)
                       ,
                       data=full_df,
                       family = quasipoisson())


plot(alri5_gam_1_mod)

ad.cb <- crossbasis(
  # the column with the exposure 
  x=full_df$cf,         
  # The number of lags
  # the 0th lag is the same day. 
  lag=0,    
  # the functional form of the constraint on the exposure dimension
  argvar=list(fun="bs", df = 3)) # I use the df I found in the test model (above)



## model using crossbasis

alri5_gam_1_mod_cb <- gam(ALRI5_5_plus ~
                            offset(log(under5population)) + 
                           ad.cb + 
                            percent_rural_corrected_ratio + 
                            electricidad_no + 
                            materials_index + 
                            elimina_servidas_red_pozo_letrina +
                            Literate_women +
                            InSchool_girls + 
                            Idioma_indigena_self +
                            pcv3_use + 
                            vaccine_index + 
                            ANC_use + 
                            ANC_visits_median_use +
                            as.factor(canton_id_universal) +
                            as.factor(period)
                          ,
                          data=full_df,
                          family = quasipoisson())

# Relative to the mean ------
pred.est <- crosspred(
  # the exposure crossbasis
  basis = ad.cb,   
  # the model
  model = alri5_gam_1_mod_cb, 
  model.link = "log", # in my case is log because my model is a quasipoisson
  at = full_df$cf, # exposure
  bylag = .2474156, # estimate association in increments of x. You can chose whatever increment you have. A good increment value would be the SD
  cen = 0.7134 # this is where your specify your reference point, which in our case should be the exposure mean
) 


# Extract fit -------
fit <- as.data.frame(pred.est$matRRfit)  
colnames(fit) <- paste0("rr_", colnames(fit))
fit <- fit %>% mutate(CF = as.numeric(row.names(fit)))

# Extract 95% CI   -------
lci <- as.data.frame(pred.est$matRRlow)  
colnames(lci) <- paste0("lci_", colnames(lci))

uci <- as.data.frame(pred.est$matRRhigh)  
colnames(uci) <- paste0("uci_", colnames(uci))

# Combine fit and 95% CI -------
table_name_mean <- as.data.frame(bind_cols(fit, lci, uci)) 


# Estimate annotations -----
or_object_45_55 <- or_gam(
  data = full_df, model = alri5_gam_1_mod,
  pred = "cf", values = c(.45, .55),
)

or_object_45_55

or_object_75_85 <- or_gam(
  data = full_df, model = alri5_gam_1_mod,
  pred = "cf", values = c(0.75, 0.85),
)

or_object_75_85


# Make main figure -------
crossbasis_fig_main <- 
  ggplot(table_name_mean, aes(x=CF, y=rr_lag0, ymin=lci_lag0, ymax=uci_lag0)) + 
  geom_ribbon(alpha=0.1, fill="dodgerblue3", color=NA) + 
  geom_line(size=0.8, color="dodgerblue4") + 
  geom_hline(yintercept = 1, size=0.5, color="grey20", linetype=3) + 
  theme_bw() + 
  geom_vline(xintercept=0.61, linetype=2, size=0.8) +
  geom_label(aes(x = 0.725, y = 1.7, label = "Threshold: 61%\n(95% CI, 52%-70%)"),
             label.size = NA, fill = "white") +
  geom_segment(aes(x=0.45, xend=0.45, y=1.27, yend=1.32),
               color="black") +
  geom_segment(aes(x=0.55, xend=0.55, y=1.21, yend=1.26),
               color="black") +
  geom_segment(aes(x=0.46, xend=0.54, y=1.31, yend=1.26),
               color="black",
               arrow = arrow(length = unit(2, "mm"))) +
  annotate("text", x=0.43, y=1.35, label = "45%", size=4) +
  annotate("text", x=.575, y=1.27, label = "55%", size=4) +
  annotate("text", x=0.50, y=1.58, label = "MRR: 0.95\n(95% CI, 0.86-1.04)") +
  geom_segment(aes(x=0.75, xend=0.75, y=.92, yend=.97),
               color="black") +
  geom_segment(aes(x=.85, xend=.85, y=.76, yend=.81),
               color="black") +
  geom_segment(aes(x=0.76, xend=0.84, y=.96, yend=.83),
               color="black",
               arrow = arrow(length = unit(2, "mm"))) +
  annotate("text", x=0.74, y=.85, label = "75%", size=4) +
  annotate("text", x=0.86, y=.69, label = "85%", size=4) +
  annotate("text", x=0.80, y=1.13, label = "MRR: 0.82\n(95% CI, 0.79-0.85)") +
  scale_x_continuous(labels=scales::percent_format()) +
  scale_y_continuous(breaks=c(0.4, 0.6,  0.8, 1,
                              1.2, 1.4,  1.6, 1.8),
                     minor_breaks = c(0.5, 0.7, 0.9, 1.1, 1.3, 1.5, 1.7, 1.9)) +
  xlab("Clean fuel use") + 
  ylab("Mortality rate ratio relative to the Mean\n(71% CF)") + 
  theme(axis.text = element_text(size=12, family="Helvetica", color="black"),
        axis.title.x=element_blank(),
        legend.position = "none",
        legend.title = element_blank(),
        legend.text = element_text(size=10, family="Helvetica"),
        axis.title.y = element_text(size=13, family = "Helvetica", face="bold")) 

# Make histogram -------
histogram <- ggplot(full_df, aes(x=cf)) +
  geom_histogram(fill="dodgerblue4", alpha=0.8, color="black", bins = 50) + 
  theme_bw() +
  scale_x_continuous(labels=scales::percent_format()) + 
  xlab("Clean fuel use") + ylab("Count of cantons") +
  theme(axis.text = element_text(size=11, family="Helvetica"),
        axis.title = element_text(size=13, family = "Helvetica", face="bold")) 

# Combine figure -------
crossbasis_fig_main_histogram <- 
  cowplot::plot_grid(
  crossbasis_fig_main,
  histogram,
  align="hv",
  rel_heights = c(1, 0.5),
  nrow=2
)


cowplot::ggsave2("~/Desktop/Figure2.pdf",
                 crossbasis_fig_main_histogram,
                 width = 225,
                 height = 150,
                 units = "mm",
                 dpi = 600)


# Figure 3 ----------

# Estimate health benefits

# Process data to generate yearly values ----


predict_canton_accrual_full_int <-predict_canton_accrual %>% 
  left_join(full_df %>% 
              mutate(period = ifelse(period=="2015-2019", "2015", period)) %>% 
              dplyr::select(canton_id_universal, period,
                            under5population, cf, cf_1990,
                            percent_rural_corrected_ratio, electricidad_no,
                            materials_index, elimina_servidas_red_pozo_letrina,
                            Literate_women, InSchool_girls, Idioma_indigena_self,
                            pcv3_use, vaccine_index, #mage_mean, 
                            ANC_use, ANC_visits_median_use,
                            obtiene_agua_redpublica_tuberia_adentro_use), by=c("canton_id_universal", "period")) %>% 
  mutate(period = as.numeric(period)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.approx(under5population, na.rm=F),
         cf = zoo::na.approx(cf, na.rm=F),
         cf_1990 = zoo::na.approx(cf_1990, na.rm=F),
         percent_rural_corrected_ratio = zoo::na.approx(percent_rural_corrected_ratio, na.rm=F),
         electricidad_no = zoo::na.approx(electricidad_no, na.rm=F),
         materials_index = zoo::na.approx(materials_index, na.rm=F),
         elimina_servidas_red_pozo_letrina = zoo::na.approx(elimina_servidas_red_pozo_letrina, na.rm=F),
         Literate_women = zoo::na.approx(Literate_women, na.rm=F),
         InSchool_girls = zoo::na.approx(InSchool_girls, na.rm=F),
         Idioma_indigena_self = zoo::na.approx(Idioma_indigena_self, na.rm=F),
         pcv3_use = zoo::na.approx(pcv3_use, na.rm=F),
         vaccine_index = zoo::na.approx(vaccine_index, na.rm=F),
         # mage_mean = zoo::na.approx(mage_mean, na.rm=F),
         ANC_use = zoo::na.approx(ANC_use, na.rm=F),
         ANC_visits_median_use = zoo::na.approx(ANC_visits_median_use, na.rm=F),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.approx(obtiene_agua_redpublica_tuberia_adentro_use, na.rm=F)) %>% 
  group_by(canton_id_universal) %>% 
  mutate(under5population = zoo::na.locf0(under5population),
         cf = zoo::na.locf0(cf),
         cf_1990 = zoo::na.locf0(cf_1990),
         percent_rural_corrected_ratio = zoo::na.locf0(percent_rural_corrected_ratio),
         electricidad_no = zoo::na.locf0(electricidad_no),
         materials_index = zoo::na.locf0(materials_index),
         elimina_servidas_red_pozo_letrina = zoo::na.locf0(elimina_servidas_red_pozo_letrina),
         Literate_women = zoo::na.locf0(Literate_women),
         InSchool_girls = zoo::na.locf0(InSchool_girls),
         Idioma_indigena_self = zoo::na.locf0(Idioma_indigena_self),
         pcv3_use = zoo::na.locf0(pcv3_use),
         vaccine_index = zoo::na.locf0(vaccine_index),
         # mage_mean = zoo::na.locf0(mage_mean),
         ANC_use = zoo::na.locf0(ANC_use),
         ANC_visits_median_use = zoo::na.locf0(ANC_visits_median_use),
         obtiene_agua_redpublica_tuberia_adentro_use = zoo::na.locf0(obtiene_agua_redpublica_tuberia_adentro_use))

predict_canton_accrual_full_int_1990 <- predict_canton_accrual_full_int %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990) %>%
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))

predict_canton_accrual_full_int1 <- predict_canton_accrual_full_int %>% 
  mutate(year = period) %>% 
  mutate(period = ifelse(year>=1990 & year <= 1996, "1990", 
                         ifelse(year>=1997 & year <=2006, "2001",
                                ifelse(year>=2007 & year<=2013, "2010", "2015-2019"))))



predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_1_mod, 
                                 newdata = predict_canton_accrual_full_int1, 
                                 type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_1_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)


predict_canton_accrual <- 
  matrix(NA, length(unique(full_df$canton_id_universal))*30, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 30)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(1990:2019, 169)) %>% 
  mutate(period = as.character(period))

# Counterfactual: no changes in %CF or covariates, present canton-year population --------

predict_canton_accrual_full_int_counterfactual <- predict_canton_accrual_full_int1 %>% 
  mutate(percent_rural_corrected_ratio_1990 = ifelse(period=="1990", percent_rural_corrected_ratio, NA),
         electricidad_no_1990 = ifelse(period=="1990", electricidad_no, NA),
         materials_index_1990 = ifelse(period=="1990", materials_index, NA),
         elimina_servidas_red_pozo_letrina_1990 = ifelse(period=="1990", elimina_servidas_red_pozo_letrina, NA),
         Literate_women_1990 = ifelse(period=="1990", Literate_women, NA),
         InSchool_girls_1990 = ifelse(period=="1990", InSchool_girls, NA),
         Idioma_indigena_self_1990 = ifelse(period=="1990", Idioma_indigena_self, NA),
         pcv3_use_1990 = ifelse(period=="1990", pcv3_use, NA),
         vaccine_index_1990 = ifelse(period=="1990", vaccine_index, NA),
         # mage_mean_1990 = ifelse(period=="1990", mage_mean, NA),
         ANC_use_1990 = ifelse(period=="1990", ANC_use, NA),
         ANC_visits_median_use_1990 = ifelse(period=="1990", ANC_visits_median_use, NA),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = ifelse(period=="1990", obtiene_agua_redpublica_tuberia_adentro_use, NA)) %>% 
  mutate(percent_rural_corrected_ratio_1990 = min(percent_rural_corrected_ratio_1990, na.rm=T),
         electricidad_no_1990 = min(electricidad_no_1990, na.rm=T),
         materials_index_1990 = min(materials_index_1990, na.rm=T),
         elimina_servidas_red_pozo_letrina_1990 = min(elimina_servidas_red_pozo_letrina_1990, na.rm=T),
         Literate_women_1990 = min(Literate_women_1990, na.rm=T),
         InSchool_girls_1990 = min(InSchool_girls_1990, na.rm=T),
         Idioma_indigena_self_1990 = min(Idioma_indigena_self_1990, na.rm=T),
         pcv3_use_1990 = min(pcv3_use_1990, na.rm=T),
         vaccine_index_1990 = min(vaccine_index_1990, na.rm=T),
         # mage_mean_1990 = min(mage_mean_1990, na.rm=T),
         ANC_use_1990 = min(ANC_use_1990, na.rm=T),
         ANC_visits_median_use_1990 = min(ANC_visits_median_use_1990, na.rm=T),
         obtiene_agua_redpublica_tuberia_adentro_use_1990 = min(obtiene_agua_redpublica_tuberia_adentro_use_1990, na.rm=T)) %>% 
  dplyr::select(-percent_rural_corrected_ratio, -cf,
                -electricidad_no, -materials_index,
                -elimina_servidas_red_pozo_letrina, -Literate_women,
                -InSchool_girls, -Idioma_indigena_self,
                -pcv3_use, -vaccine_index,
                #-mage_mean, 
                -ANC_use, -ANC_visits_median_use,
                -obtiene_agua_redpublica_tuberia_adentro_use) %>% 
  dplyr::rename(percent_rural_corrected_ratio = percent_rural_corrected_ratio_1990, 
                cf = cf_1990,
                electricidad_no = electricidad_no_1990, 
                materials_index = materials_index_1990,
                elimina_servidas_red_pozo_letrina = elimina_servidas_red_pozo_letrina_1990, 
                Literate_women = Literate_women_1990,
                InSchool_girls = InSchool_girls_1990, 
                Idioma_indigena_self = Idioma_indigena_self_1990,
                pcv3_use = pcv3_use_1990, 
                vaccine_index = vaccine_index_1990,
                # mage_mean = mage_mean_1990,
                ANC_use = ANC_use_1990, 
                ANC_visits_median_use = ANC_visits_median_use_1990,
                obtiene_agua_redpublica_tuberia_adentro_use = obtiene_agua_redpublica_tuberia_adentro_use_1990) %>% 
  dplyr::select(-period) %>% 
  mutate(period = "1990")


# 10.1 Main model ---------

alri5_gam_1_mod

# new approach -------

predict_canton_same <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period"))

predict_canton_1990 <- matrix(NA, length(unique(full_df$canton_id_universal))*4, 3) %>% 
  as_tibble() %>% 
  rename(canton_id_universal = V1,
         period = V2,
         ALRI5_5_plus = V3) %>% 
  mutate(canton_id_universal = rep(unique(full_df$canton_id_universal), 4)) %>% 
  arrange(canton_id_universal) %>% 
  mutate(period = rep(c("1990", "2001", "2010", "2015-2019"), 169)) %>% 
  left_join(full_df, by = c("canton_id_universal", "period")) %>% 
  dplyr::select(-cf) %>% 
  rename(cf = cf_1990)

predicted_canton_same <- predict(alri5_gam_1_mod, newdata = predict_canton_accrual_full_int1, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int1 %>% 
              dplyr::select(canton_id_universal, period, year, cf)) 

predicted_canton_1990 <- predict(alri5_gam_1_mod, newdata = predict_canton_accrual_full_int_1990, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_1990 %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

predicted_canton_full <- predicted_canton_same %>% 
  dplyr::select(canton_id_universal, period, year, fit, se.fit) %>% 
  rename(predicted_alri5_same = fit,
         predicted_alri5_same_se = se.fit) %>% 
  left_join(predicted_canton_1990 %>% 
              dplyr::select(canton_id_universal,period, year, fit, se.fit) %>% 
              rename(predicted_alri5_1990 = fit,
                     predicted_alri5_1990_se = se.fit), by = c("canton_id_universal", "year")) %>% 
  distinct() %>% 
  dplyr::select(canton_id_universal, year, 
                predicted_alri5_1990, predicted_alri5_1990_se,
                predicted_alri5_same, predicted_alri5_same_se,
                everything()) %>%
  mutate(predicted_alri5_same_low = predicted_alri5_same - 1.96*predicted_alri5_same_se,
         predicted_alri5_same_high = predicted_alri5_same + 1.96*predicted_alri5_same_se,
         predicted_alri5_1990_low = predicted_alri5_1990 - 1.96*predicted_alri5_1990_se,
         predicted_alri5_1990_high = predicted_alri5_1990 + 1.96*predicted_alri5_1990_se)

predicted_canton_accrue_1 = predicted_canton_full %>% 
  mutate(predicted_error = predicted_alri5_1990_se + predicted_alri5_same_se) %>% 
  arrange(canton_id_universal, year) %>% 
  group_by(canton_id_universal) %>%
  mutate(averted_alri5 = predicted_alri5_1990 - predicted_alri5_same) %>% 
  mutate(averted_alri5_low = averted_alri5 - 1.96*predicted_error,
         averted_alri5_high = averted_alri5 + 1.96*predicted_error,
         averted_alri5_lowlow = predicted_alri5_1990_low - predicted_alri5_same_low,
         averted_alri5_highhigh = predicted_alri5_1990_high - predicted_alri5_same_high)

# summarizing health benefits -------

sum(predicted_canton_accrue_1$averted_alri5, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_lowlow, na.rm=T)
sum(predicted_canton_accrue_1$averted_alri5_highhigh, na.rm=T)

predicted_canton_accrue_1 %>% 
  mutate(accrual_period = ifelse(year>=1990 & year<=2001, "1990 to 2001",
                                 ifelse(year>2001 & year<=2010, "2001 to 2010",
                                        ifelse(year>2010, "2010 to 2019", NA)))) %>% 
  group_by(accrual_period) %>% 
  summarize(averted_int = sum(averted_alri5, na.rm=T),
            averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
            averted_int_high = sum(averted_alri5_highhigh, na.rm = T))

predicted_canton_accrual_summary <-  predicted_canton_accrue_1 %>% 
  dplyr::group_by(canton_id_universal) %>% 
  dplyr::summarize(averted_int = sum(averted_alri5, na.rm=T),
                   averted_int_low = sum(averted_alri5_lowlow, na.rm = T),
                   averted_int_high = sum(averted_alri5_highhigh, na.rm = T))


table(predicted_canton_accrual_summary$averted_int<0)
table(predicted_canton_accrual_summary$averted_int_low>0)
table(predicted_canton_accrual_summary$averted_int_high<0)

# percent averted ------

predicted_canton_counterfactual <- predict(alri5_gam_1_mod, newdata = 
                                             predict_canton_accrual_full_int_counterfactual, type="response", se.fit=TRUE) %>% 
  as_tibble() %>% 
  bind_cols(predict_canton_accrual_full_int_counterfactual %>% 
              dplyr::select(canton_id_universal, period, cf, year, ALRI5_5_plus)) 

# Total mortalities were nothing to have changed since 1990 except pop growth
sum(predicted_canton_counterfactual$fit) 
# Total mortalities were %CF to not have changed since 1990, but all else as observed
sum(predicted_canton_1990$fit) 
# Total mortalities using observed data
sum(predicted_canton_same$fit) 
# Percent averted
(sum(predicted_canton_1990$fit) - sum(predicted_canton_same$fit))/ (sum(predicted_canton_counterfactual$fit) - sum(predicted_canton_same$fit))

# making % averted figure ---------

canton_predicted_percent_averted_df <- 
  predicted_canton_same %>% 
  rename(same = fit) %>% 
  left_join(predicted_canton_counterfactual %>% 
              rename(counterfactual = fit), by = c("canton_id_universal", "year")) %>% 
  left_join(predicted_canton_1990 %>% 
              rename(ninety = fit), by = c("canton_id_universal",  "year")) %>% 
  mutate(diff_ninety_same = (ninety - same),
         diff_counter_same =  (counterfactual-same)) %>% 
  group_by(canton_id_universal) %>% 
  summarize(diff_ninety_same = sum(diff_ninety_same, na.rm=T),
            diff_counter_same = sum(diff_counter_same, na.rm=T)) %>% 
  mutate(percent_averted = diff_ninety_same / diff_counter_same)

ecu_shp2_df_bind_averted <- ecu_shp2_df_bind %>% 
  left_join(canton_predicted_percent_averted_df %>% 
              dplyr::select(canton_id_universal, percent_averted) %>% 
              mutate(ADM2_PCODE = paste0("EC", str_sub(canton_id_universal, 0, 2), str_sub(canton_id_universal, 4, 5))) %>% 
              dplyr::select(-canton_id_universal))

my_breaks <- c(-0.10, 0, 0.05, 0.10, 0.15, 0.20, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5)
my_breaks <- c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
my_labels = c("0%", "10%", "20%", "30%", "40%", "50%")
my_breaks_3 <- c(0, .05, 0.1, .15, 0.2,.25, 0.3)
my_labels_3 = c("0%", "5%", "10%", "15%", "20%", "25%", ">30%")

canton_averted_map <- ggplot(data=ecu_shp2_df_bind_averted) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=percent_averted), colour = "white", size=.005) +
  geom_sf(data=ecu_shp1, alpha=0, color="white", size = .25) + 
  scale_fill_viridis_c(
    breaks = my_breaks_3, labels = my_labels_3,
    limits=c(0, 0.3),
    oob=scales::squish
  ) +
  coord_sf(xlim = c(-81.175, -75.13), ylim = c(-5.09, 1.493)) + 
  labs(fill = "% of under-5 LRI mortality averted\nattributable to increased %CF") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        # legend.position = c(0.83, 0.20),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())

canton_averted_map_galapagos <- ggplot(data=ecu_shp2_df_bind_averted %>% 
                                         mutate(Galapagos = str_sub(ADM2_PCODE, 0, 4)) %>% 
                                         filter(Galapagos == "EC20")) + 
  ggmap::theme_nothing() +
  geom_sf(aes(fill=percent_averted), colour = "white", size=.005) +
  scale_fill_viridis_c(
    breaks = my_breaks_3, labels = my_labels_3,
    limits=c(0, 0.3),
    oob=scales::squish
  ) +
  labs(fill = "% of under-5 LRI mortality averted\nattributable to increased %CF") + 
  theme(legend.title = element_text(size=14),
        legend.text = element_text(size=14),
        legend.position = "none",
        plot.title.position = "plot",
        plot.title = element_blank())


canton_averted_map_full <- ggdraw() +
  draw_plot(canton_averted_map, x = 0, y = 0, width = 1, height = 1) +
  draw_plot(canton_averted_map_galapagos, x = 0.55, y = 0, width = 0.40, height = .35)

averted_map_legend <- get_legend(
  ggplot(data=ecu_shp2_df_bind_averted) + 
    theme_classic() +
    geom_sf(aes(fill=percent_averted), colour = "white", size=0.005) +
    geom_sf(data=ecu_shp1, alpha=0, color="white", size = 0.25) + 
    scale_fill_viridis_c(
      breaks = my_breaks_3, labels = my_labels_3,
      limits=c(0, 0.3),
      oob=scales::squish
    ) +
    theme(legend.title = element_blank(),
          legend.text = element_text(size=12),
          # legend.position = c(0.83, 0.20),
          legend.position = "top",
          legend.key.height = unit(0.6, 'cm'),
          legend.key.width = unit(1.8, 'cm'),
          plot.title.position = "plot",
          plot.title = element_text(size=24))
)

averted_map_full <- plot_grid(
  averted_map_legend,
  canton_averted_map_full,
  nrow=2, 
  rel_heights = c(0.3, 1)
)

# cowplot::ggsave2("~/Desktop/Figure3.pdf",
#                  averted_map_full,
#                  width = 150,
#                  height = 150,
#                  units = "mm",
#                  dpi = 600)

